In [1]:
import pandas as pd
import numpy as np
data = pd.read_csv("C:/Users/DELL/OneDrive/Projects/Online Payment Fraud Detection/onlinefraud.csv")
In [4]:
data.head((5))
Out[4]:
| step | type | amount | nameOrig | oldbalanceOrg | newbalanceOrig | nameDest | oldbalanceDest | newbalanceDest | isFraud | isFlaggedFraud | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | PAYMENT | 9839.64 | C1231006815 | 170136.0 | 160296.36 | M1979787155 | 0.0 | 0.0 | 0 | 0 |
| 1 | 1 | PAYMENT | 1864.28 | C1666544295 | 21249.0 | 19384.72 | M2044282225 | 0.0 | 0.0 | 0 | 0 |
| 2 | 1 | TRANSFER | 181.00 | C1305486145 | 181.0 | 0.00 | C553264065 | 0.0 | 0.0 | 1 | 0 |
| 3 | 1 | CASH_OUT | 181.00 | C840083671 | 181.0 | 0.00 | C38997010 | 21182.0 | 0.0 | 1 | 0 |
| 4 | 1 | PAYMENT | 11668.14 | C2048537720 | 41554.0 | 29885.86 | M1230701703 | 0.0 | 0.0 | 0 | 0 |
In [5]:
print(data.isnull().sum())
step 0 type 0 amount 0 nameOrig 0 oldbalanceOrg 0 newbalanceOrig 0 nameDest 0 oldbalanceDest 0 newbalanceDest 0 isFraud 0 isFlaggedFraud 0 dtype: int64
In [6]:
#explore transaction type
data.type.value_counts()
Out[6]:
type CASH_OUT 2237500 PAYMENT 2151495 CASH_IN 1399284 TRANSFER 532909 DEBIT 41432 Name: count, dtype: int64
In [7]:
type=data["type"].value_counts()
print(type)
type CASH_OUT 2237500 PAYMENT 2151495 CASH_IN 1399284 TRANSFER 532909 DEBIT 41432 Name: count, dtype: int64
In [8]:
transaction=type.index
quantity=type.values
In [9]:
import plotly.express as px
figure=px.pie(data,values=quantity,names=transaction,hole=0.5,title="distor of transaction type")
figure.show()
In [10]:
numeric_cols=data.select_dtypes(include=['float64','int64'])
correlation=numeric_cols.corr()
print(correlation)
step amount oldbalanceOrg newbalanceOrig \
step 1.000000 0.022373 -0.010058 -0.010299
amount 0.022373 1.000000 -0.002762 -0.007861
oldbalanceOrg -0.010058 -0.002762 1.000000 0.998803
newbalanceOrig -0.010299 -0.007861 0.998803 1.000000
oldbalanceDest 0.027665 0.294137 0.066243 0.067812
newbalanceDest 0.025888 0.459304 0.042029 0.041837
isFraud 0.031578 0.076688 0.010154 -0.008148
isFlaggedFraud 0.003277 0.012295 0.003835 0.003776
oldbalanceDest newbalanceDest isFraud isFlaggedFraud
step 0.027665 0.025888 0.031578 0.003277
amount 0.294137 0.459304 0.076688 0.012295
oldbalanceOrg 0.066243 0.042029 0.010154 0.003835
newbalanceOrig 0.067812 0.041837 -0.008148 0.003776
oldbalanceDest 1.000000 0.976569 -0.005885 -0.000513
newbalanceDest 0.976569 1.000000 0.000535 -0.000529
isFraud -0.005885 0.000535 1.000000 0.044109
isFlaggedFraud -0.000513 -0.000529 0.044109 1.000000
In [11]:
correlation["isFraud"].sort_values(ascending=False)
Out[11]:
isFraud 1.000000 amount 0.076688 isFlaggedFraud 0.044109 step 0.031578 oldbalanceOrg 0.010154 newbalanceDest 0.000535 oldbalanceDest -0.005885 newbalanceOrig -0.008148 Name: isFraud, dtype: float64
In [12]:
data["type"]=data["type"].map({"CASH_OUT":1,"PAYMENT":2,"CASH_IN":3,"TRANSFER":4,"DEBIT":5})
In [13]:
data["isFraud"]=data["isFraud"].map({0:"no fraud",1:"fraud"})
data.head(5)
Out[13]:
| step | type | amount | nameOrig | oldbalanceOrg | newbalanceOrig | nameDest | oldbalanceDest | newbalanceDest | isFraud | isFlaggedFraud | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 2 | 9839.64 | C1231006815 | 170136.0 | 160296.36 | M1979787155 | 0.0 | 0.0 | no fraud | 0 |
| 1 | 1 | 2 | 1864.28 | C1666544295 | 21249.0 | 19384.72 | M2044282225 | 0.0 | 0.0 | no fraud | 0 |
| 2 | 1 | 4 | 181.00 | C1305486145 | 181.0 | 0.00 | C553264065 | 0.0 | 0.0 | fraud | 0 |
| 3 | 1 | 1 | 181.00 | C840083671 | 181.0 | 0.00 | C38997010 | 21182.0 | 0.0 | fraud | 0 |
| 4 | 1 | 2 | 11668.14 | C2048537720 | 41554.0 | 29885.86 | M1230701703 | 0.0 | 0.0 | no fraud | 0 |
In [14]:
#train the model
from sklearn.model_selection import train_test_split
x=np.array(data[["type","amount","oldbalanceOrg","newbalanceOrig"]])
y=np.array(data[["isFraud"]])
In [15]:
from sklearn.tree import DecisionTreeClassifier
In [20]:
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.10,random_state=42)
model=DecisionTreeClassifier()
model.fit(xtrain,ytrain)
print(model.score(xtest,ytest))
0.9997359578286932
In [22]:
#prediction
features=np.array([[4,9000.0,9000.0,0.0]])
print(model.predict(features))
['fraud']
In [ ]: